/* This file runs a series of power analyses using the combined experts
sample vs the nonexperts. For each observed difference in proportions, for our
sample sizes, what power do we have? Additionally, create graphs of the power
curves for experts and nonexperts.
*/

clear
version 13.1
set more off
set scheme  s1mono

cd "${main}"

cap log close
log using "${logs}/power_analysis_experts_nonexperts.log", replace

di "This program was last run at: `c(current_time)' `c(current_date)'"								

** data from phase 1 **
use "${dat}/Pilot 1.0/survey_merged.dta", clear
keep if treat1 == 1
gen repeat_participant = 2
keep gid abs_disc y repeat_participant attention
tempfile phase1
save "`phase1'"

** data from phase 2 **
use "${dat}/Phase II/survey_merged_PhaseII.dta", clear
keep if treat1 == 1
keep gid abs_disc y repeat_participant attention
tempfile phase2
save "`phase2'"

** data from phase 3 **
use "${dat}/Phase III/survey_merged_PhaseIII.dta", clear
keep if treat1 == 1
keep gid abs_disc y repeat_participant attention
tempfile phase3
save "`phase3'"

use "`phase1'", clear
append using "`phase2'"
append using "`phase3'"
*gen playersurvey_type = "RD"
*append using "`phase5'"
*replace playersurvey_type = "RK" if playersurvey_type == ""
gen expert = 0
replace repeat_participant=0 if repeat_participant==2

tempfile nonexpert
save "`nonexpert'"

use "${dat}/Expert Cornell/Expert Cornell part1.dta", clear
append using "${dat}/Expert Irvine/Expert Irvine part1.dta"
gen seminar = 1
append using "${dat}/Expert Jul 2019 Pilot/Expert Jul 2019 Pilot part1.dta"
append using "${dat}/Expert Aug 2019 Pilot/Expert Aug 2019 Pilot part1.dta"
append using "${dat}/Expert Aug 2019 First Round/Expert Aug 2019 First Round part1.dta"
replace seminar = 0 if seminar != 1
gen expert = 1
gen repeat_participant = .
gen attention = .

keep gid abs_disc y repeat_participant attention expert seminar

append using "`nonexpert'"

* Create string discontinuity variable (can't do == statements with most decimals)
gen abs_disc_str = string(abs_disc)

tempfile full
save "`full'"

*******************
* Power analysis RD
*******************
use "`full'", clear
foreach disc in "0" ".1944" ".324" ".54" ".9" "1.5" {

	di "`disc'"

	preserve
	keep if abs_disc_str == "`disc'"
	di "Expert responses"
	sum y if expert == 1
	local expertprop = r(mean)
	local expertn = r(N)
	di "Nonexpert responses"
	sum y if expert == 0
	local nonexpertprop = r(mean)
	local nonexpertn = r(N)
	
	capture	power twoproportions `expertprop' `nonexpertprop', n1(`expertn') n2(`nonexpertn')
	
	if _rc == 0 {
	
		power twoproportions `expertprop' `nonexpertprop', n1(`expertn') n2(`nonexpertn')
		
	}
	
	else di "ERROR: Proportions must be between 0 and 1"
	
	restore

}

collapse (mean) y, by(abs_disc_str expert)
destring abs_disc_str, gen(abs_disc)
sort abs_disc

gen big_bins = 0
tempfile d
save "`d'"

twoway (connected y abs_disc if expert == 1) ///
	(connected y abs_disc if expert == 0), ///
	legend(lab (1 "Experts")  lab(2 "Non-Experts")) ///	
	ytitle("Share Reporting Discontinuity") ///
	xtitle("Discontinuity Magnitude")
graph export "${output}/experts_nonexperts_power_rd.pdf", as(pdf) replace

* Power analysis RD (QSS) vs nonexperts big bins
use "${dat}/Expert Princeton/Expert Princeton part1.dta", clear
gen expert = 1
cap drop _merge
cap drop disc

* Append in non-experts
append using "${dat}/survey_merged_rd_estimates_micro"
keep if playertreatment_group == "B"
replace expert = 0 if expert == .

gen abs_disc_str = string(abs_disc)

foreach disc in "0" ".1944" ".324" ".54" ".9" "1.5" {

	di "`disc'"

	preserve
	keep if abs_disc_str == "`disc'"
	di "Expert responses"
	sum y if expert == 1
	local expertprop = r(mean)
	local expertn = r(N)
	di "Nonexpert responses"
	sum y if expert == 0
	local nonexpertprop = r(mean)
	local nonexpertn = r(N)
	
	capture	power twoproportions `expertprop' `nonexpertprop', n1(`expertn') n2(`nonexpertn')
	
	if _rc == 0 {
	
		power twoproportions `expertprop' `nonexpertprop', n1(`expertn') n2(`nonexpertn')
		
	}
	
	else di "ERROR: Proportions must be between 0 and 1"
	
	restore

}

collapse (mean) y, by(abs_disc_str expert)
destring abs_disc_str, gen(abs_disc)
sort abs_disc
twoway (connected y abs_disc if expert == 1) ///
	(connected y abs_disc if expert == 0), ///
	yscale(range(0 1)) ///
	legend(lab (1 "Experts")  lab(2 "Non-Experts")) ///	
	ytitle("Share Reporting Discontinuity") ///
	xtitle("Discontinuity Magnitude")
graph export "${output}/experts_qss_nonexperts_bigbins_power_rd.pdf", as(pdf) replace

append using "`d'"
replace big_bins = 1 if big_bins == .
sort abs_disc
* Numerical discontinuity identifier for merging later
gen disc = ceil(_n / 4) - 1

tempfile collapsed
save "`collapsed'", replace

* Code to indicate whether experts-non-experts perform stat. signif. differently
* Experts and non-experts
preserve
use "${dat}/expert_merged_rd_estimates_micro.dta", clear
gen expert = 1
cap drop _merge
cap drop disc

* Append in non-experts
append using "${dat}/survey_merged_rd_estimates_micro"
keep if playertreatment_group == "A"
replace expert = 0 if expert == .

replace abs_disc = round(abs_disc,0.0001)

cap drop disc
gen disc  = 0 if abs_disc<=0.01
replace disc = 4 if abs_disc>=0.89 & abs_disc<=0.91
replace disc = 5 if abs_disc>=1.49
replace disc = 3 if abs_disc>=0.53 & abs_disc<=0.55
replace disc = 2 if abs_disc>=0.32 & abs_disc<=0.33
replace disc = 1 if abs_disc>=0.19 & abs_disc<=0.2

label define disc_lab 0 "0" 1 "0.1944" 2 "0.324" 3 "0.54" 4 "0.9" 5 "1.5"
label values disc disc_lab
tabulate disc, gen(d)

egen player = group(pid)

reg y d1 d2 d3 d4 d5 d6 if expert == 1, nocons vce(cluster player)
forvalues num = 1/6 {
	local se`num'E = _se[d`num']
	local beta`num'E = _b[d`num']
					}
reg y d1 d2 d3 d4 d5 d6 if expert == 0, nocons vce(cluster player)					
forvalues num = 1/6 {
	local se`num'N = _se[d`num']
	local beta`num'N = _b[d`num']

	*Difference
	local diff`num'E =  `beta`num'E' - `beta`num'N'
	local se_diff =  sqrt((`se`num'E'^2) + (`se`num'N'^2))
	local cl`num'E = `diff`num'E' - 1.96*`se_diff'
	local cu`num'E = `diff`num'E' + 1.96*`se_diff'
					}						
					
drop _all
set obs 6

gen disc = _n - 1
label values disc disc_lab

*Create cardinal discontinuity
gen discc = 0 if disc == 0
replace discc = 0.1944 if disc == 1
replace discc = 0.324 if disc == 2
replace discc = 0.54 if disc == 3
replace discc = 0.9 if disc == 4
replace discc = 1.5 if disc == 5

gen betaE = .
gen betaN = .
gen diffE = .
gen cuE = .
gen clE = .
gen diffSignif = .

forvalues num = 1/6 { 

	replace betaE = `beta`num'E' if disc == `num'-1 
	replace betaN = `beta`num'N' if disc == `num'-1 
	replace diffE = `diff`num'E' if disc == `num'-1
	replace cuE = `cu`num'E' if disc == `num'-1
	replace clE = `cl`num'E' if disc == `num'-1
}

replace diffSignif = cuE < 0 | clE > 0
gen big_bins = 0
tempfile small_bins_diffs
save "`small_bins_diffs'", replace
restore

* Experts (QSS) and non-experts (Large bins)
use "${dat}/Expert Princeton/Expert Princeton part1.dta", clear
gen expert = 1
cap drop _merge
cap drop disc

* Append in non-experts
append using "${dat}/survey_merged_rd_estimates_micro"
replace expert = 0 if expert == .
keep if (expert == 1 & playertreatment_group == "B") | (expert == 0 & playertreatment_group == "B" & phase != 3)

*Modify discontinuity variable:
replace abs_disc = round(abs_disc,0.0001)

cap drop disc
gen disc  = 0 if abs_disc<=0.01
replace disc = 4 if abs_disc>=0.89 & abs_disc<=0.91
replace disc = 5 if abs_disc>=1.49
replace disc = 3 if abs_disc>=0.53 & abs_disc<=0.55
replace disc = 2 if abs_disc>=0.32 & abs_disc<=0.33
replace disc = 1 if abs_disc>=0.19 & abs_disc<=0.2

label define disc_lab 0 "0" 1 "0.1944" 2 "0.324" 3 "0.54" 4 "0.9" 5 "1.5"
label values disc disc_lab
tabulate disc, gen(d)

egen player = group(pid)

reg y d1 d2 d3 d4 d5 d6 if expert == 1, nocons vce(cluster player)
forvalues num = 1/6 {
	local se`num'E = _se[d`num']
	local beta`num'E = _b[d`num']
					}
reg y d1 d2 d3 d4 d5 d6 if expert == 0, nocons vce(cluster player)					
forvalues num = 1/6 {
	local se`num'N = _se[d`num']
	local beta`num'N = _b[d`num']

	*Difference
	local diff`num'E =  `beta`num'E' - `beta`num'N'
	local se_diff =  sqrt((`se`num'E'^2) + (`se`num'N'^2))
	local cl`num'E = `diff`num'E' - 1.96*`se_diff'
	local cu`num'E = `diff`num'E' + 1.96*`se_diff'
					}						
					
drop _all
set obs 6

gen disc = _n - 1
label values disc disc_lab

*Create cardinal discontinuity
gen discc = 0 if disc == 0
replace discc = 0.1944 if disc == 1
replace discc = 0.324 if disc == 2
replace discc = 0.54 if disc == 3
replace discc = 0.9 if disc == 4
replace discc = 1.5 if disc == 5

gen betaE = .
gen betaN = .
gen diffE = .
gen cuE = .
gen clE = .
gen diffSignif = .

forvalues num = 1/6 { 

	replace betaE = `beta`num'E' if disc == `num'-1 
	replace betaN = `beta`num'N' if disc == `num'-1 
	replace diffE = `diff`num'E' if disc == `num'-1
	replace cuE = `cu`num'E' if disc == `num'-1
	replace clE = `cl`num'E' if disc == `num'-1
}
replace diffSignif = cuE < 0 | clE > 0
gen big_bins = 1
append using "`small_bins_diffs'"
* Use the non-experts to indicate statistically significant differences
gen expert = 0

merge 1:1 expert big_bins disc using "`collapsed'"

twoway (scatter y abs_disc if big_bins == 0 & expert == 1, msymbol(+)) ///
	(scatter y abs_disc if big_bins == 1 & expert == 1, msymbol(x)) ///
	(scatter y abs_disc if big_bins == 0 & expert == 0 & diffSignif == 1, msymbol(O)) ///
	(scatter y abs_disc if big_bins == 1 & expert == 0 & diffSignif == 1, msymbol(T)) ///
	(scatter y abs_disc if big_bins == 0 & expert == 0 & diffSignif == 0, msymbol(Oh)) ///
	(scatter y abs_disc if big_bins == 1 & expert == 0 & diffSignif == 0, msymbol(Th)) ///
	(connected y abs_disc if big_bins == 0 & expert == 1, lpattern(solid) msymbol(none)) ///
	(connected y abs_disc if big_bins == 0 & expert == 0, lpattern(dash) msymbol(none)) ///
	(connected y abs_disc if big_bins == 1 & expert == 1, lpattern(solid) msymbol(none)) ///
	(connected y abs_disc if big_bins == 1 & expert == 0, lpattern(dash) msymbol(none)), ///
	yscale(range(0 1)) ///
	legend(order(1 "Experts (Small Bins)"  2 "Experts (Large Bins)" 3 "Non-Experts (Small Bins)" 4 "Non-Experts (Large Bins)")) ///	
	ytitle("Share Reporting Discontinuity") ///
	xtitle("Discontinuity Magnitude") ///
	ymtick(0.05) ymlabel("0.05", labsize(small) angle(0)) ///
	ylabel(, angle(0))
graph export "${output}/experts_nonexperts_allbins_power_rd.pdf", as(pdf) replace

graph close _all

log close
